bitkeeper revision 1.1379 (426ca278F5TOjFztt77FKYt8v457dg)
authorleendert@watson.ibm.com[kaf24] <leendert@watson.ibm.com[kaf24]>
Mon, 25 Apr 2005 07:55:36 +0000 (07:55 +0000)
committerleendert@watson.ibm.com[kaf24] <leendert@watson.ibm.com[kaf24]>
Mon, 25 Apr 2005 07:55:36 +0000 (07:55 +0000)
[PATCH] [PATCH] Assorted VMX patches

This patch contains the following VMX patches:

- ioemu: Update the address when doing a stosb/stosw/stosl. Without this patch
  stosb writes req->count times at the same location req->addr. Not exactly
  the intended semantics.

- xc_ptrace.c: Use the page_array to refer to partition memory. This allows
  you to use gdbserver on a VMX partition (and presumably partitions that use
  shadow page tables).

- dom0_ops.c: Preserve ECF_VMX_GUEST flag for gdbserver. Without it you
  cannot (re)set the VMX domain state.

- vmx.c: Added support for lmsw. Unmodified FreeBSD uses this when running
  inside a VMX partition.

Signed-Off-By: Leendert van Doorn <leendert@watson.ibm.com>
===== tools/ioemu/iodev/cpu.cc 1.8 vs edited =====

tools/ioemu/iodev/cpu.cc
tools/libxc/xc_ptrace.c
xen/arch/x86/dom0_ops.c
xen/arch/x86/vmx.c
xen/include/asm-x86/vmx.h

index cbeb9ebb6877d869db15d91ee912dbfa7a297d59..3e234d5cc82ad295a3a6725bdd88edbc6b0b01a1 100644 (file)
@@ -128,15 +128,13 @@ void bx_cpu_c::dispatch_ioreq(ioreq_t *req)
                if (!req->pdata_valid) {
                        if(req->dir == IOREQ_READ){//read
                                //BX_INFO(("mmio[value]: <READ> addr:%llx, value:%llx, size: %llx, count: %llx\n", req->addr, req->u.data, req->size, req->count));
-
                                for (i = 0; i < req->count; i++) {
-                                       BX_MEM_READ_PHYSICAL(req->addr, req->size, &req->u.data);
+                                       BX_MEM_READ_PHYSICAL(req->addr + (sign * i * req->size), req->size, &req->u.data);
                                }
                        } else if(req->dir == IOREQ_WRITE) {//write
                                //BX_INFO(("mmio[value]: <WRITE> addr:%llx, value:%llx, size: %llx, count: %llx\n", req->addr, req->u.data, req->size, req->count));
-
                                for (i = 0; i < req->count; i++) {
-                                       BX_MEM_WRITE_PHYSICAL(req->addr, req->size, &req->u.data);
+                                       BX_MEM_WRITE_PHYSICAL(req->addr + (sign * i * req->size), req->size, &req->u.data);
                                }
                        }
                } else {
index f4403907db24dd86f60b1c1880ad214435849f40..b7e6e89562db080f6cef5e5e2ddc6bbd4691d8f7 100644 (file)
@@ -75,7 +75,7 @@ struct gdb_regs {
        int retval = xc_domain_getfullinfo(xc_handle, domid, cpu, NULL, &ctxt[cpu]); \
        if (retval) \
            goto error_out; \
-       cr3[cpu] = ctxt[cpu].pt_base; \
+       cr3[cpu] = ctxt[cpu].pt_base; /* physical address */ \
        regs_valid[cpu] = 1; \
     } \
 
@@ -128,11 +128,12 @@ struct gdb_regs {
 
 
 static int                      xc_handle;
+static long                    nr_pages = 0;
+unsigned long                  *page_array = NULL;
 static int                      regs_valid[MAX_VIRT_CPUS];
 static unsigned long            cr3[MAX_VIRT_CPUS];
 static full_execution_context_t ctxt[MAX_VIRT_CPUS];
 
-
 /* --------------------- */
 
 static void *
@@ -140,6 +141,7 @@ map_domain_va(unsigned long domid, int cpu, void * guest_va, int perm)
 {
     unsigned long pde, page;
     unsigned long va = (unsigned long)guest_va;
+    long npgs = xc_get_tot_pages(xc_handle, domid);
 
     static unsigned long  cr3_phys[MAX_VIRT_CPUS];
     static unsigned long *cr3_virt[MAX_VIRT_CPUS];
@@ -150,6 +152,21 @@ map_domain_va(unsigned long domid, int cpu, void * guest_va, int perm)
     
     static int            prev_perm[MAX_VIRT_CPUS];
 
+    if (nr_pages != npgs) {
+       if (nr_pages > 0)
+           free(page_array);
+       nr_pages = npgs;
+       if ((page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL) {
+           printf("Could not allocate memory\n");
+           goto error_out;
+       }
+
+       if (xc_get_pfn_list(xc_handle, domid, page_array, nr_pages) != nr_pages) {
+               printf("Could not get the page frame list\n");
+               goto error_out;
+       }
+    }
+
     FETCH_REGS(cpu);
 
     if (cr3[cpu] != cr3_phys[cpu]) 
@@ -162,8 +179,9 @@ map_domain_va(unsigned long domid, int cpu, void * guest_va, int perm)
                                             cr3_phys[cpu] >> PAGE_SHIFT)) == NULL)
            goto error_out;
     } 
-    if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0)
+    if ((pde = cr3_virt[cpu][vtopdi(va)]) == 0) /* logical address */
        goto error_out;
+    pde = page_array[pde >> PAGE_SHIFT] << PAGE_SHIFT;
     if (pde != pde_phys[cpu]) 
     {
        pde_phys[cpu] = pde;
@@ -174,8 +192,9 @@ map_domain_va(unsigned long domid, int cpu, void * guest_va, int perm)
                                             pde_phys[cpu] >> PAGE_SHIFT)) == NULL)
            goto error_out;
     }
-    if ((page = pde_virt[cpu][vtopti(va)]) == 0)
+    if ((page = pde_virt[cpu][vtopti(va)]) == 0) /* logical address */
        goto error_out;
+    page = page_array[page >> PAGE_SHIFT] << PAGE_SHIFT;
     if (page != page_phys[cpu] || perm != prev_perm[cpu]) 
     {
        page_phys[cpu] = page;
@@ -330,6 +349,7 @@ xc_ptrace(enum __ptrace_request request, pid_t domid, void *addr, void *data)
            perror("dom0 op failed");
            goto error_out;
        }
+       /* FALLTHROUGH */
     case PTRACE_CONT:
     case PTRACE_DETACH:
        if (request != PTRACE_SINGLESTEP) {
index 6f18bfcd8d9216e8e172d17d0aa4727c0c030b79..35ea082e28a3e5f7a9c5d46dac2999a68f2bfd95 100644 (file)
@@ -402,6 +402,10 @@ void arch_getdomaininfo_ctxt(
         c->flags |= ECF_I387_VALID;
     if ( KERNEL_MODE(ed, &ed->arch.user_ctxt) )
         c->flags |= ECF_IN_KERNEL;
+#ifdef CONFIG_VMX
+    if (VMX_DOMAIN(ed))
+        c->flags |= ECF_VMX_GUEST;
+#endif
     memcpy(&c->fpu_ctxt,
            &ed->arch.i387,
            sizeof(ed->arch.i387));
index b9d06ac30504e67c78df34ed6b6ad61de5fcfa47..77c87c7953cc338e6a586389f82a2027890ac841 100644 (file)
@@ -640,6 +640,84 @@ error:
     return 0;
 }
 
+static int vmx_set_cr0(unsigned long value)
+{
+    struct exec_domain *d = current;
+    unsigned long old_base_mfn, mfn;
+    unsigned long eip;
+
+    /* 
+     * CR0: We don't want to lose PE and PG.
+     */
+    __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
+
+    if (value & (X86_CR0_PE | X86_CR0_PG) &&
+        !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
+        /*
+         * Enable paging
+         */
+        set_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state);
+        /*
+         * The guest CR3 must be pointing to the guest physical.
+         */
+        if ( !VALID_MFN(mfn = phys_to_machine_mapping(
+                            d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
+             !get_page(pfn_to_page(mfn), d->domain) )
+        {
+            VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx",
+                        d->arch.arch_vmx.cpu_cr3);
+            domain_crash_synchronous(); /* need to take a clean path */
+        }
+        old_base_mfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
+        if (old_base_mfn)
+            put_page(pfn_to_page(old_base_mfn));
+
+        /*
+         * Now arch.guest_table points to machine physical.
+         */
+        d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+        update_pagetables(d);
+
+        VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
+                (unsigned long) (mfn << PAGE_SHIFT));
+
+        __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
+        /* 
+         * arch->shadow_table should hold the next CR3 for shadow
+         */
+        VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", 
+                d->arch.arch_vmx.cpu_cr3, mfn);
+    } else {
+        if ((value & X86_CR0_PE) == 0) {
+            __vmread(GUEST_EIP, &eip);
+            VMX_DBG_LOG(DBG_LEVEL_1,
+               "Disabling CR0.PE at %%eip 0x%lx", eip);
+           if (vmx_assist(d, VMX_ASSIST_INVOKE)) {
+               set_bit(VMX_CPU_STATE_ASSIST_ENABLED,
+                                       &d->arch.arch_vmx.cpu_state);
+               __vmread(GUEST_EIP, &eip);
+               VMX_DBG_LOG(DBG_LEVEL_1,
+                   "Transfering control to vmxassist %%eip 0x%lx", eip);
+               return 0; /* do not update eip! */
+           }
+       } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
+                                       &d->arch.arch_vmx.cpu_state)) {
+           __vmread(GUEST_EIP, &eip);
+           VMX_DBG_LOG(DBG_LEVEL_1,
+               "Enabling CR0.PE at %%eip 0x%lx", eip);
+           if (vmx_assist(d, VMX_ASSIST_RESTORE)) {
+               clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
+                                       &d->arch.arch_vmx.cpu_state);
+               __vmread(GUEST_EIP, &eip);
+               VMX_DBG_LOG(DBG_LEVEL_1,
+                   "Restoring to %%eip 0x%lx", eip);
+               return 0; /* do not update eip! */
+           }
+       }
+    }
+    return 1;
+}
+
 #define CASE_GET_REG(REG, reg)  \
     case REG_ ## REG: value = regs->reg; break
 
@@ -650,7 +728,6 @@ static int mov_to_cr(int gp, int cr, struct xen_regs *regs)
 {
     unsigned long value;
     unsigned long old_cr;
-    unsigned long eip;
     struct exec_domain *d = current;
 
     switch (gp) {
@@ -675,80 +752,8 @@ static int mov_to_cr(int gp, int cr, struct xen_regs *regs)
     switch(cr) {
     case 0: 
     {
-        unsigned long old_base_mfn, mfn;
-
-        /* 
-         * CR0:
-         * We don't want to lose PE and PG.
-         */
-        __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
-        __vmwrite(CR0_READ_SHADOW, value);
-
-        if (value & (X86_CR0_PE | X86_CR0_PG) &&
-            !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
-            /*
-             * Enable paging
-             */
-            set_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state);
-            /*
-             * The guest CR3 must be pointing to the guest physical.
-             */
-            if ( !VALID_MFN(mfn = phys_to_machine_mapping(
-                                d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)) ||
-                 !get_page(pfn_to_page(mfn), d->domain) )
-            {
-                VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx",
-                            d->arch.arch_vmx.cpu_cr3);
-                domain_crash_synchronous(); /* need to take a clean path */
-            }
-            old_base_mfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
-            if ( old_base_mfn )
-                put_page(pfn_to_page(old_base_mfn));
-
-            /*
-             * Now arch.guest_table points to machine physical.
-             */
-            d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
-            update_pagetables(d);
-
-            VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx", 
-                    (unsigned long) (mfn << PAGE_SHIFT));
-
-            __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
-            /* 
-             * arch->shadow_table should hold the next CR3 for shadow
-             */
-            VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx", 
-                    d->arch.arch_vmx.cpu_cr3, mfn);
-        } else {
-            if ((value & X86_CR0_PE) == 0) {
-               __vmread(GUEST_EIP, &eip);
-                VMX_DBG_LOG(DBG_LEVEL_1,
-                       "Disabling CR0.PE at %%eip 0x%lx", eip);
-               if (vmx_assist(d, VMX_ASSIST_INVOKE)) {
-                   set_bit(VMX_CPU_STATE_ASSIST_ENABLED,
-                                               &d->arch.arch_vmx.cpu_state);
-                   __vmread(GUEST_EIP, &eip);
-                   VMX_DBG_LOG(DBG_LEVEL_1,
-                       "Transfering control to vmxassist %%eip 0x%lx", eip);
-                   return 0; /* do not update eip! */
-               }
-           } else if (test_bit(VMX_CPU_STATE_ASSIST_ENABLED,
-                                       &d->arch.arch_vmx.cpu_state)) {
-               __vmread(GUEST_EIP, &eip);
-               VMX_DBG_LOG(DBG_LEVEL_1,
-                       "Enabling CR0.PE at %%eip 0x%lx", eip);
-               if (vmx_assist(d, VMX_ASSIST_RESTORE)) {
-                   clear_bit(VMX_CPU_STATE_ASSIST_ENABLED,
-                                               &d->arch.arch_vmx.cpu_state);
-                   __vmread(GUEST_EIP, &eip);
-                   VMX_DBG_LOG(DBG_LEVEL_1,
-                       "Restoring to %%eip 0x%lx", eip);
-                   return 0; /* do not update eip! */
-               }
-           }
-       }
-        break;
+       __vmwrite(CR0_READ_SHADOW, value);
+       return vmx_set_cr0(value);
     }
     case 3: 
     {
@@ -790,8 +795,8 @@ static int mov_to_cr(int gp, int cr, struct xen_regs *regs)
                 domain_crash_synchronous(); /* need to take a clean path */
             }
             old_base_mfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
-            d->arch.guest_table  = mk_pagetable(mfn << PAGE_SHIFT);
-            if ( old_base_mfn )
+            d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
+            if (old_base_mfn)
                 put_page(pfn_to_page(old_base_mfn));
             update_pagetables(d);
             /* 
@@ -893,6 +898,13 @@ static int vmx_cr_access(unsigned long exit_qualification, struct xen_regs *regs
         value &= ~X86_CR0_TS; /* clear TS */
         __vmwrite(CR0_READ_SHADOW, value);
         break;
+    case TYPE_LMSW:
+        __vmwrite(CR0_READ_SHADOW, value);
+       value = (value & ~0xF) |
+               (((exit_qualification & LMSW_SOURCE_DATA) >> 16) & 0xF) |
+               1 /* CR0.PE == 1 */;
+       return vmx_set_cr0(value);
+        break;
     default:
         __vmx_bug(regs);
         break;
index 15f4ec32f8de0207d5064f35e1e383c17f7d93d4..de79484963375f86f9869c3798eab2654909c464 100644 (file)
@@ -89,6 +89,7 @@ extern unsigned int cpu_rev;
 #define TYPE_MOV_TO_CR                  (0 << 4) 
 #define TYPE_MOV_FROM_CR                (1 << 4)
 #define TYPE_CLTS                       (2 << 4)
+#define        TYPE_LMSW                       (3 << 4)
 #define CONTROL_REG_ACCESS_REG          0x700   /* 10:8, general purpose register */
 #define REG_EAX                         (0 << 8) 
 #define REG_ECX                         (1 << 8) 
@@ -98,6 +99,7 @@ extern unsigned int cpu_rev;
 #define REG_EBP                         (5 << 8) 
 #define REG_ESI                         (6 << 8) 
 #define REG_EDI                         (7 << 8) 
+#define        LMSW_SOURCE_DATA                (0xFFFF << 16) /* 16:31 lmsw source */
 
 /*
  * Exit Qualifications for MOV for Debug Register Access